########################
### Replication File for "The David Effect and ISDS"
### Puig, Strezhnev
########################
### Note: Be sure to set your working directory to the location of the two .csv data files

### Clear environment
rm(list=ls())

##### Libraries
library(Hmisc)
library(gtools)
library(gplots)
library(ggplot2)

#### Globals
nIter <- 5000
random.seed <- 1628 # Generated via "Random.org" [1-5000]

############################################
##### First Experiment
############################################

#################################
## Data pre-processing - Experiment 1
#################################

## Load in cleaned results file
data <- read.csv("experiment1_public.csv")

##### Note: -99 = missing

## How many initial respondents
sum(!is.na(data$ConsentForm)&data$ConsentForm!=2)

### Drop anyone who didn't answer 1 (Yes) to the consent form
data <- subset(data, !is.na(data$ConsentForm)&data$ConsentForm!=2)

## Make respondent ID a character
data$ResponseID <- as.character(data$ResponseID)

## Merge reimbursement questions
## Investor-State
data$isClaimantReimbursesAll[!is.na(data$isClaimantReimbursesAll2)] <- data$isClaimantReimbursesAll2[!is.na(data$isClaimantReimbursesAll2)]
data$isClaimantReimbursesSome[!is.na(data$isClaimantReimbursesSome2)] <- data$isClaimantReimbursesSome2[!is.na(data$isClaimantReimbursesSome2)]
data$isSplitExpenses[!is.na(data$isSplitExpenses2)] <- data$isSplitExpenses2[!is.na(data$isSplitExpenses2)]
data$isRespondentReimbursesSome[!is.na(data$isRespondentReimbursesSome2)] <- data$isRespondentReimbursesSome2[!is.na(data$isRespondentReimbursesSome2)]
data$isRespondentReimbursesAll[!is.na(data$isRespondentReimbursesAll2)] <- data$isRespondentReimbursesAll2[!is.na(data$isRespondentReimbursesAll2)]

## Investor-Investor
data$iiClaimantReimbursesAll[!is.na(data$iiClaimantReimbursesAll2)] <- data$iiClaimantReimbursesAll2[!is.na(data$iiClaimantReimbursesAll2)]
data$iiClaimantReimbursesSome[!is.na(data$iiClaimantReimbursesSome2)] <- data$iiClaimantReimbursesSome2[!is.na(data$iiClaimantReimbursesSome2)]
data$iiSplitExpenses[!is.na(data$iiSplitExpenses2)] <- data$iiSplitExpenses2[!is.na(data$iiSplitExpenses2)]
data$iiRespondentReimbursesSome[!is.na(data$iiRespondentReimbursesSome2)] <- data$iiRespondentReimbursesSome2[!is.na(data$iiRespondentReimbursesSome2)]
data$iiRespondentReimbursesAll[!is.na(data$iiRespondentReimbursesAll2)] <- data$iiRespondentReimbursesAll2[!is.na(data$iiRespondentReimbursesAll2)]

## Recode outcome levels to be more legible
levels(data$isOutcome)[grepl("respondent did NOT unfairly treat", levels(data$isOutcome))] <- "Respondent wins"
levels(data$isOutcome)[grepl("claims were manifestly without legal merit", levels(data$isOutcome))] <- "Claimant without legal merit"
levels(data$isOutcome)[grepl("present dispute was outside of the jurisdiction", levels(data$isOutcome))] <- "Outside jurisdiction"
levels(data$isOutcome)[grepl("respondent unfairly treated and wrongfully expropriated", levels(data$isOutcome))] <- "Claimant wins"

## Claimants/Respondents for legibility
levels(data$iiClaimant)[grepl("Fortune 500", levels(data$iiClaimant))] <- "Rich Company"
levels(data$iiClaimant)[grepl("modest but growing local", levels(data$iiClaimant))] <- "Developing Company"

levels(data$iiRespondent)[grepl("Fortune 500", levels(data$iiRespondent))] <- "Rich Company"
levels(data$iiRespondent)[grepl("modest but growing local", levels(data$iiRespondent))] <- "Developing Company"

levels(data$isRespondent)[grepl("middle income", levels(data$isRespondent))] <- "Middle Income"
levels(data$isRespondent)[grepl("low income", levels(data$isRespondent))] <- "Low Income"

levels(data$isClaimant)[grepl("middle income", levels(data$isClaimant))] <- "Middle Income"
levels(data$isClaimant)[grepl("high income", levels(data$isClaimant))] <- "High Income"

## Recode blank level as "blind appointment"
levels(data$isAppointer)[grepl("claimant", levels(data$isAppointer))] <- "Claimant"
levels(data$isAppointer)[grepl("respondent", levels(data$isAppointer))] <- "Respondent"
levels(data$isAppointer)[grepl("parties", levels(data$isAppointer))] <- "Parties"
levels(data$isAppointer)[grepl(".", levels(data$isAppointer), fixed=T)] <- "Blind"

####### Recode -99 as NA
data$genderMale[data$genderMale == -99] <- NA
data$genderFemale[data$genderFemale == -99] <- NA

####### Recode cost share variable 
data$isReimbursement <- NA
data$isReimbursement[data$isClaimantReimbursesAll == 1] <- 1
data$isReimbursement[data$isClaimantReimbursesSome == 1] <- 2
data$isReimbursement[data$isSplitExpenses == 1] <- 3
data$isReimbursement[data$isRespondentReimbursesSome == 1] <- 4
data$isReimbursement[data$isRespondentReimbursesAll == 1] <- 5

####### Recode cost share variable - Investor-investor
data$iiReimbursement <- NA
data$iiReimbursement[data$iiClaimantReimbursesAll == 1] <- 1
data$iiReimbursement[data$iiClaimantReimbursesSome == 1] <- 2
data$iiReimbursement[data$iiSplitExpenses == 1] <- 3
data$iiReimbursement[data$iiRespondentReimbursesSome == 1] <- 4
data$iiReimbursement[data$iiRespondentReimbursesAll == 1] <- 5

#####################################################
### Covariates of respondents

### Gender
data$genderMale[data$genderMale == -99] <- NA

### Employment
data$employmentPrivate[data$employmentPrivate == -99] <- NA
data$employmentProfessor[data$employmentProfessor == -99] <- NA
data$employmentGovt[data$employmentGovt == -99] <- NA
data$employmentOther[data$employmentOther== -99] <- NA
data$employmentCoded[data$employmentCoded == ""] <- NA

### Served on IS dispute?
data$isDisputeYes[data$isDisputeYes == -99] <- NA

######################################
### Treatment variables ##
######################################

### Who won the dispute?
data$isWinner <- as.factor(ifelse(data$isOutcome == "Claimant wins", "Claimant", "Respondent"))
data$isWinner <- relevel(data$isWinner, "Respondent")

### Conditional treatments - "Winning party is rich" vs. "Winning party is poor"

data$isWinnerRich <- ifelse((data$isWinner == "Respondent"&data$isRespondent == "Middle Income")|
                                     (data$isWinner == "Claimant"&data$isClaimant == "High Income"), "Rich", "Poor")
data$isWinnerRich <- as.factor(data$isWinnerRich)
## Losing party is rich v. losing party is poor
data$isLoserRich <- ifelse((data$isWinner == "Claimant"&data$isRespondent == "Middle Income")|
                                    (data$isWinner == "Respondent"&data$isClaimant == "High Income"), "Rich", "Poor")
data$isLoserRich <- as.factor(data$isLoserRich)

### Were the parties appointed by the winner vs. loser?
data$isAppointerSide <- as.character(data$isAppointer)
data$isAppointerSide[(data$isAppointer == "Claimant"& data$isWinner == "Claimant")|
                         (data$isAppointer == "Respondent"& data$isWinner == "Respondent")] <- "Appointed by Winner"
data$isAppointerSide[(data$isAppointer == "Claimant"& data$isWinner == "Respondent")|
                         (data$isAppointer == "Respondent"& data$isWinner == "Claimant")] <- "Appointed by Loser"
data$isAppointerSide <- as.factor(data$isAppointerSide)

data$isAppointerSide <- relevel(data$isAppointerSide, "Appointed by Loser")

### Reimbursement - reframe in terms of the winner of the dispute
data$isReimbursementFolded <- data$isReimbursement
data$isReimbursementFolded[data$isWinner == "Respondent"] <- 6 - data$isReimbursement[data$isWinner == "Respondent"]

###### Subset to units that finished the survey
completed.data <- subset(data, Finished == 1)

############################################
#### Effect of party resources
############################################

### Subset to cases where respondents answered the outcome question on costs
completed.data.full <- subset(completed.data, !is.na(completed.data$isReimbursementFolded))

### Number of observations in total
nrow(completed.data.full)

### Parse outcome into a bunch of binary indicators
completed.data.full$isSplit <- as.integer(completed.data.full$isReimbursementFolded == 3)
completed.data.full$isSome <- as.integer(completed.data.full$isReimbursementFolded == 4)
completed.data.full$isAll <- as.integer(completed.data.full$isReimbursementFolded == 5)
completed.data.full$isAny <- as.integer(completed.data.full$isReimbursementFolded == 4 | completed.data.full$isReimbursementFolded == 5)

######################
#### Estimate treament effects and bootstrap the standard errors
######################

#### Placeholder for holding point estimates and standard errors

results_frame_is <- data.frame(treatment=c(), outcome=c(), estimate=c(), standard_error = c(), pval = c())

######### Effect of winning party resources vs. losing party resources

### Set the random seed before sampling
set.seed(random.seed)

## Matrix to store marginal effects
PoorWin <- matrix(nrow=nIter, ncol=4)
PoorLose <- matrix(nrow=nIter, ncol=4)

## Make "Rich" the baseline level for both
completed.data.full$isWinnerRich <- relevel(completed.data.full$isWinnerRich, "Rich")
completed.data.full$isLoserRich <- relevel(completed.data.full$isLoserRich, "Rich")

### Sample sizes for each treatment condition
table(completed.data.full$isWinnerRich)
table(completed.data.full$isLoserRich)

# Bootstrap routine
for (iter in 1:nIter){
  
  # Resample rows of data
  if (iter != 1){ 
    indices <- sample(1:nrow(completed.data.full), nrow(completed.data.full), replace=T) 
    boot.data <- completed.data.full[indices,]
  }else{ # Keep the data the same for the first bootstrap iteration
    boot.data <- completed.data.full
  }
  
  # Difference-in-means (fully-saturated regression models)
  ### Investor-state vignette, winner rich v. winner poor
  boot_split <- lm(isSplit ~ isWinnerRich, data=boot.data)
  boot_some <- lm(isSome ~  isWinnerRich, data=boot.data)
  boot_all <- lm(isAll ~  isWinnerRich, data=boot.data)
  boot_any <- lm(isAny ~ isWinnerRich, data=boot.data)
  
  PoorWin[iter,] <- c(coefficients(boot_split)["isWinnerRichPoor"], coefficients(boot_some)["isWinnerRichPoor"], coefficients(boot_all)["isWinnerRichPoor"],coefficients(boot_any)["isWinnerRichPoor"])
  
  ### Investor-state vignette, loser rich v. loser poor
  boot_split <- lm(isSplit ~ isLoserRich, data=boot.data)
  boot_some <- lm(isSome ~  isLoserRich, data=boot.data)
  boot_all <- lm(isAll ~  isLoserRich, data=boot.data)
  boot_any <- lm(isAny ~ isLoserRich, data=boot.data)
  
  PoorLose[iter,] <- c(coefficients(boot_split)["isLoserRichPoor"], coefficients(boot_some)["isLoserRichPoor"], coefficients(boot_all)["isLoserRichPoor"],coefficients(boot_any)["isLoserRichPoor"])
  
}

## Point Estimate
PoorWin_point <- PoorWin[1,]
PoorLose_point <- PoorLose[1,]
## SD of bootstraps is our estimated SE
PoorWin_se <- apply(PoorWin, 2, function(x) sd(x))
PoorLose_se <- apply(PoorLose, 2, function(x) sd(x))
## CIs are quantiles of the bootstrap distribution
PoorWin_ci <- apply(PoorWin, 2, function(x) quantile(x, c(.025, .975)))
PoorWin_ci90 <- apply(PoorWin, 2, function(x) quantile(x, c(.05, .95)))
PoorLose_ci <- apply(PoorLose, 2, function(x) quantile(x, c(.025, .975)))
PoorLose_ci90 <- apply(PoorLose, 2, function(x) quantile(x, c(.05, .95)))
## P-value from SE estimate of bootstrap (sampling distribution is asymptotically normal, so this doesn't differ much from just taking quantiles)
PoorWin_pval <- sapply(PoorWin_point/PoorWin_se, function(x) 2*pnorm(-abs(x)))
PoorLose_pval <- sapply(PoorLose_point/PoorLose_se, function(x) 2*pnorm(-abs(x)))

### Output point/SE/CI estimates
round(PoorWin_point, 3)
round(PoorWin_se, 3)
round(PoorWin_pval, 3)

round(PoorLose_point, 3)
round(PoorLose_se, 3)
round(PoorLose_pval, 3)

### Store in regression tables

results_frame_is <- rbind(results_frame_is,
                            data.frame(treatment=c("Investor-State Winner Poor v. Rich","Investor-State Winner Poor v. Rich","Investor-State Winner Poor v. Rich","Investor-State Winner Poor v. Rich"), outcome=c("Split Costs", "Some Costs", "All Costs","Any Costs"), estimate=round(PoorWin_point, 3)[1:4], 
                                       standard_error = round(PoorWin_se, 3)[1:4], pval = round(PoorWin_pval, 3)[1:4]))

results_frame_is <- rbind(results_frame_is,
                            data.frame(treatment=c("Investor-State Loser Poor v. Rich","Investor-State Loser Poor v. Rich","Investor-State Loser Poor v. Rich","Investor-State Loser Poor v. Rich"), outcome=c("Split Costs", "Some Costs", "All Costs","Any Costs"), estimate=round(PoorLose_point, 3)[1:4], 
                                       standard_error = round(PoorLose_se, 3)[1:4], pval = round(PoorLose_pval, 3)[1:4]))

### Output
results_frame_is

########################################
#### Plot of marginal effects for Rich/Poor Winner vs. Loser cases - Investor-State
########################################

treat_winner_effect_is <- data.frame(choice = c("Split Costs", "Loser\nPays Some", "Loser\nPays All"), point= PoorWin[1,c(1:3)],
                                  lower95 = apply(PoorWin, 2, function(x) quantile(x, c(.025, .975)))[1,c(1:3)], upper95=
                                    apply(PoorWin, 2, function(x) quantile(x, c(.025, .975)))[2,c(1:3)])

treat_loser_effect_is <- data.frame(choice = c("Split Costs", "Loser\nPays Some", "Loser\nPays All"), point= PoorLose[1,c(1:3)],
                                 lower95 = apply(PoorLose, 2, function(x) quantile(x, c(.025, .975)))[1,c(1:3)], upper95=
                                   apply(PoorLose, 2, function(x) quantile(x, c(.025, .975)))[2,c(1:3)])


png("treat_winner_is.png", width=900, height=350)
p = ggplot(treat_winner_effect_is, aes(y=point, x=choice))
p = p + coord_flip()  
p = p + geom_hline(yintercept = 0,size=1,linetype="dashed") 
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 3)
p = p + scale_y_continuous(name="Change in the probability respondents select the outcome", limits=c(-.3, .3))
p = p + scale_x_discrete(name="")
#p = p + geom_text(data=mentions_results_lags_pvals, aes(x=x, y=y, label=label), size=4, colour=c("black"))
# colour scheme
theme_bw1 <- function(base_size = 20, base_family = "") {
  
  theme_bw(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.x =       element_text(size = base_size*.9, colour = "black"),
      axis.text.y =       element_text(size = base_size, colour = "black"), # changes position of X axis text
      
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      plot.title =             element_text(face = "bold"),
      legend.position = "none"
    )
}
p = p + ggtitle("Winning party is lower income vs. winning party is higher income")
p = p + theme_bw1()
print(p)
dev.off()
ggsave("treat_winner_ii.pdf", width=12,height=4)

png("treat_loser_is.png", width=900, height=350)
p = ggplot(treat_loser_effect_is, aes(y=point, x=choice))
p = p + coord_flip()  
p = p + geom_hline(yintercept = 0,size=1,linetype="dashed") 
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 3)
p = p + scale_y_continuous(name="Change in the probability respondents select the outcome", limits=c(-.3, .3))
p = p + scale_x_discrete(name="")
#p = p + geom_text(data=mentions_results_lags_pvals, aes(x=x, y=y, label=label), size=4, colour=c("black"))
# colour scheme
theme_bw1 <- function(base_size = 20, base_family = "") {
  
  theme_bw(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.x =       element_text(size = base_size*.9, colour = "black"),
      axis.text.y =       element_text(size = base_size, colour = "black"), # changes position of X axis text
      
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      plot.title =             element_text(face = "bold"),
      legend.position = "none"
    )
}
p = p + ggtitle("Losing party is lower income vs. losing party is higher income")
p = p + theme_bw1()
print(p)
dev.off()
ggsave("treat_loser_is.pdf", width=12,height=4)

########################################
#####################################################
### Investor-investor vignette
#####################################################
########################################

### Subset to cases where respondents answered the outcome question on costs
completed.data.ii.full <- subset(completed.data, !is.na(completed.data$iiReimbursement))

### Number of observations in total
nrow(completed.data.ii.full)

### Parse outcome into a bunch of binary indicators
completed.data.ii.full$iiSplit <- as.integer(completed.data.ii.full$iiReimbursement == 3)
completed.data.ii.full$iiSome <- as.integer(completed.data.ii.full$iiReimbursement == 4)
completed.data.ii.full$iiAll <- as.integer(completed.data.ii.full$iiReimbursement == 5)
completed.data.ii.full$iiAny <- as.integer(completed.data.ii.full$iiReimbursement== 4 | completed.data.ii.full$iiReimbursement == 5)

#### Placeholder for holding point estimates and standard errors

results_frame_ii <- data.frame(treatment=c(), outcome=c(), estimate=c(), standard_error = c(), pval = c())

######### Effect of winning party resources vs. losing party resources

### Set the random seed before sampling
set.seed(random.seed)

## Matrix to store marginal effects
ii_PoorWin <- matrix(nrow=nIter, ncol=4)
ii_PoorLose <- matrix(nrow=nIter, ncol=4)

## Make "Rich" the baseline level for both
completed.data.ii.full$iiClaimant <- relevel(completed.data.ii.full$iiClaimant, "Rich Company")
completed.data.ii.full$iiRespondent <- relevel(completed.data.ii.full$iiRespondent, "Rich Company")

# Bootstrap routine
for (iter in 1:nIter){
  
  # Resample rows of data
  if (iter != 1){ 
    indices <- sample(1:nrow(completed.data.ii.full), nrow(completed.data.ii.full), replace=T) 
    boot.data <- completed.data.ii.full[indices,]
  }else{ # Keep the data the same for the first bootstrap iteration
    boot.data <- completed.data.ii.full
  }
  
  # Difference-in-means (fully-saturated regression models)
  ### Investor-investor vignette, winner rich v. winner poor
  boot_split <- lm(iiSplit ~ iiClaimant, data=boot.data)
  boot_some <- lm(iiSome ~  iiClaimant, data=boot.data)
  boot_all <- lm(iiAll ~  iiClaimant, data=boot.data)
  boot_any <- lm(iiAny ~ iiClaimant, data=boot.data)
  
  ii_PoorWin[iter,] <- c(coefficients(boot_split)["iiClaimantDeveloping Company"], coefficients(boot_some)["iiClaimantDeveloping Company"], coefficients(boot_all)["iiClaimantDeveloping Company"],coefficients(boot_any)["iiClaimantDeveloping Company"])
  
  ### Investor-state vignette, loser rich v. loser poor
  boot_split <- lm(iiSplit ~ iiRespondent, data=boot.data)
  boot_some <- lm(iiSome ~  iiRespondent, data=boot.data)
  boot_all <- lm(iiAll ~  iiRespondent, data=boot.data)
  boot_any <- lm(iiAny ~ iiRespondent, data=boot.data)
  
  ii_PoorLose[iter,] <- c(coefficients(boot_split)["iiRespondentDeveloping Company"], coefficients(boot_some)["iiRespondentDeveloping Company"], coefficients(boot_all)["iiRespondentDeveloping Company"],coefficients(boot_any)["iiRespondentDeveloping Company"])
  
}

## Point Estimate
ii_PoorWin_point <- ii_PoorWin[1,]
ii_PoorLose_point <- ii_PoorLose[1,]
## SD of bootstraps is our estimated SE
ii_PoorWin_se <- apply(ii_PoorWin, 2, function(x) sd(x))
ii_PoorLose_se <- apply(ii_PoorLose, 2, function(x) sd(x))
## CIs are quantiles of the bootstrap distribution
ii_PoorWin_ci <- apply(ii_PoorWin, 2, function(x) quantile(x, c(.025, .975)))
ii_PoorWin_ci90 <- apply(ii_PoorWin, 2, function(x) quantile(x, c(.05, .95)))
ii_PoorLose_ci <- apply(ii_PoorLose, 2, function(x) quantile(x, c(.025, .975)))
ii_PoorLose_ci90 <- apply(ii_PoorLose, 2, function(x) quantile(x, c(.05, .95)))
## P-value from SE estimate of bootstrap (sampling distribution is asymptotically normal, so this doesn't differ much from just taking quantiles)
ii_PoorWin_pval <- sapply(ii_PoorWin_point/ii_PoorWin_se, function(x) 2*pnorm(-abs(x)))
ii_PoorLose_pval <- sapply(ii_PoorLose_point/ii_PoorLose_se, function(x) 2*pnorm(-abs(x)))

### Output point/SE/CI estimates
round(ii_PoorWin_point, 3)
round(ii_PoorWin_se, 3)
round(ii_PoorWin_pval, 3)

round(ii_PoorLose_point, 3)
round(ii_PoorLose_se, 3)
round(ii_PoorLose_pval, 3)

### Store in regression tables

results_frame_ii <- rbind(results_frame_ii,
                          data.frame(treatment=c("Investor-investor Claimant Poor v. Rich","Investor-investor Claimant Poor v. Rich","Investor-investor Claimant Poor v. Rich","Investor-investor Claimant Poor v. Rich"), outcome=c("Split Costs", "Some Costs", "All Costs","Any Costs"), estimate=round(ii_PoorWin_point, 3)[1:4], 
                                     standard_error = round(ii_PoorWin_se, 3)[1:4], pval = round(ii_PoorWin_pval, 3)[1:4]))

results_frame_ii <- rbind(results_frame_ii,
                          data.frame(treatment=c("Investor-investor Respondent Poor v. Rich","Investor-investor Respondent Poor v. Rich","Investor-investor Respondent Poor v. Rich","Investor-investor Respondent Poor v. Rich"), outcome=c("Split Costs", "Some Costs", "All Costs","Any Costs"), estimate=round(ii_PoorLose_point, 3)[1:4], 
                                     standard_error = round(ii_PoorLose_se, 3)[1:4], pval = round(ii_PoorLose_pval, 3)[1:4]))

results_frame_ii ## Investor-investor results table

########################################
#### Plot of marginal effects for Rich/Poor Winner vs. Loser cases - Investor-State
########################################

treat_winner_effect_ii <- data.frame(choice = c("Split Costs", "Loser\nPays Some", "Loser\nPays All"), point= ii_PoorWin[1,c(1:3)],
                                     lower95 = apply(ii_PoorWin, 2, function(x) quantile(x, c(.025, .975)))[1,c(1:3)], upper95=
                                       apply(ii_PoorWin, 2, function(x) quantile(x, c(.025, .975)))[2,c(1:3)])

treat_loser_effect_ii <- data.frame(choice = c("Split Costs", "Loser\nPays Some", "Loser\nPays All"), point= ii_PoorLose[1,c(1:3)],
                                    lower95 = apply(ii_PoorLose, 2, function(x) quantile(x, c(.025, .975)))[1,c(1:3)], upper95=
                                      apply(ii_PoorLose, 2, function(x) quantile(x, c(.025, .975)))[2,c(1:3)])


png("treat_winner_ii.png", width=900, height=350)
p = ggplot(treat_winner_effect_ii, aes(y=point, x=choice))
p = p + coord_flip()  
p = p + geom_hline(yintercept = 0,size=1,linetype="dashed") 
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 3)
p = p + scale_y_continuous(name="Change in the probability respondents select the outcome", limits=c(-.4, .4))
p = p + scale_x_discrete(name="")
#p = p + geom_text(data=mentions_results_lags_pvals, aes(x=x, y=y, label=label), size=4, colour=c("black"))
# colour scheme
theme_bw1 <- function(base_size = 20, base_family = "") {
  
  theme_bw(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.x =       element_text(size = base_size*.9, colour = "black"),
      axis.text.y =       element_text(size = base_size, colour = "black"), # changes position of X axis text
      
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      plot.title =             element_text(face = "bold"),
      legend.position = "none"
    )
}
p = p + ggtitle("Winning party (claimant) is a small firm vs.\nwinning party (claimant) is a Fortune 500 firm")
p = p + theme_bw1()
print(p)
dev.off()
ggsave("treat_winner_ii.pdf", width=12,height=4)

png("treat_loser_ii.png", width=900, height=350)
p = ggplot(treat_loser_effect_ii, aes(y=point, x=choice))
p = p + coord_flip()  
p = p + geom_hline(yintercept = 0,size=1,linetype="dashed") 
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 3)
p = p + scale_y_continuous(name="Change in the probability respondents select the outcome", limits=c(-.4, .4))
p = p + scale_x_discrete(name="")
#p = p + geom_text(data=mentions_results_lags_pvals, aes(x=x, y=y, label=label), size=4, colour=c("black"))
# colour scheme
theme_bw1 <- function(base_size = 20, base_family = "") {
  
  theme_bw(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.x =       element_text(size = base_size*.9, colour = "black"),
      axis.text.y =       element_text(size = base_size, colour = "black"), # changes position of X axis text
      
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      plot.title =             element_text(face = "bold"),
      legend.position = "none"
    )
}
p = p + ggtitle("Losing party (respondent) is a small firm vs.\nlosing party (respondent) is a Fortune 500 firm")
p = p + theme_bw1()
print(p)
dev.off()
ggsave("treat_loser_ii.pdf", width=12,height=4)

#######################################################################################
###### Compare sampled population to the target Population
#######################################################################################

####### Complete cases (rename the data)
folded.data <- completed.data.full

### Load dataset on the targeted arbitrators
target_arbitrators <- read.csv("icsid_2010-2016_arbitrators.csv")

### Rename variables/make comparable with our primary dataset
target_arbitrators$employmentCoded <- target_arbitrators$Profession.Primary
folded.data$Gender <- as.factor(ifelse(folded.data$genderMale, "M", "F"))
folded.data$employmentCoded <- factor(folded.data$employmentCoded)

target_arbitrators$legalOrigin <- NA
target_arbitrators$legalOrigin[target_arbitrators$anyCommon == 1 & target_arbitrators$anyCivil == 0] <- "Common Law"
target_arbitrators$legalOrigin[target_arbitrators$anyCommon == 0 & target_arbitrators$anyCivil == 1] <- "Civil Law"
target_arbitrators$legalOrigin[target_arbitrators$anyCommon == 1 & target_arbitrators$anyCivil == 1] <- "Both"

folded.data$legalOrigin <- NA
folded.data$legalOrigin[folded.data$anyCommon == 1 & folded.data$anyCivil == 0] <- "Common Law"
folded.data$legalOrigin[folded.data$anyCommon == 0 & folded.data$anyCivil == 1] <- "Civil Law"
folded.data$legalOrigin[folded.data$anyCommon == 1 & folded.data$anyCivil == 1] <- "Both"

folded.data$legalOrigin <- as.factor(folded.data$legalOrigin)

### Subset data to just the observations that had responses to all of the demographic questions.
folded.data.complete <- subset(folded.data, !is.na(folded.data$legalOrigin)&!is.na(folded.data$Gender)&!is.na(folded.data$employmentCoded))

#################### Distribution of gender

gender_freq <- data.frame(category = c("Female", "Male"),
                          point = as.integer(table(folded.data.complete$Gender)))

gender_freq <- cbind(gender_freq, binconf(x = gender_freq$point, n=sum(gender_freq$point)))

png("gender_frequencies_newsamp.png", width=400, height=350)
par(cex=1)
par(cex.main=1.3)
par(las=2)
par(mar=c(5,6,4,2)+0.4)
bplot <- barplot2(gender_freq$PointEst, horiz=T, width=.2,
                  names.arg=c(paste("Female\n(", gender_freq$point[1], " total)", sep=""), 
                              paste("Male\n(", gender_freq$point[2], " total)", sep="")), xlab = "Share of responses", lwd=2,
                  main=paste("Gender of respondents in sample, n=", sum(gender_freq$point), sep=""), xlim=c(0,1), plot.ci=T, ci.lwd=2, ci.width=.8, ci.color="grey20",
                  ci.l = gender_freq$Lower, 
                  ci.u = gender_freq$Upper )
points(y=bplot+.1, x=as.vector(table(target_arbitrators$Gender)/sum(table(target_arbitrators$Gender))), cex=2, pch=4, lwd=4)
segments(x0 = c(0,0), y0=bplot+.1, x1= as.vector(table(target_arbitrators$Gender)/sum(table(target_arbitrators$Gender))), y1 = bplot+.1, lty=2, lwd=4)
dev.off()
### PDF version
pdf("gender_frequencies_newsamp.pdf", width=5.4,height=4)
par(cex=1)
par(cex.main=1.3)
par(las=2)
par(mar=c(5,6,4,2)+0.4)
bplot <- barplot2(gender_freq$PointEst, horiz=T, width=.2,
                  names.arg=c(paste("Female\n(", gender_freq$point[1], " total)", sep=""), 
                              paste("Male\n(", gender_freq$point[2], " total)", sep="")), xlab = "Share of responses", lwd=2,
                  main=paste("Gender of respondents in sample, n=", sum(gender_freq$point), sep=""), xlim=c(0,1), plot.ci=T, ci.lwd=2, ci.width=.8, ci.color="grey20",
                  ci.l = gender_freq$Lower, 
                  ci.u = gender_freq$Upper )
points(y=bplot+.1, x=as.vector(table(target_arbitrators$Gender)/sum(table(target_arbitrators$Gender))), cex=2, pch=4, lwd=4)
segments(x0 = c(0,0), y0=bplot+.1, x1= as.vector(table(target_arbitrators$Gender)/sum(table(target_arbitrators$Gender))), y1 = bplot+.1, lty=2, lwd=4)
dev.off()


################## U.S. v. Non-US nationalities
us_freq <- data.frame(category = c("US", "NonUS"),
                      point = as.integer(table(folded.data.complete$anyUS==0)))
us_freq <- cbind(us_freq, binconf(x = us_freq$point, n=sum(us_freq$point)))

png("us_frequencies_newsamp.png", width=400, height=350)
par(cex=1)
par(cex.main=1.3)
par(las=2)
par(mar=c(5,6,4,2)+0.4)
bplot <- barplot2(us_freq$PointEst, horiz=T, width=.2,
                  names.arg=c(paste("U.S. National\n(", us_freq$point[1], " total)", sep=""), 
                              paste("Non-U.S.\nNational\n(", us_freq$point[2], " total)", sep="")), xlab = "Share of responses", lwd=2,
                  main=paste("Nationality of respondents\nin sample, n=", sum(us_freq$point), sep=""), xlim=c(0,1), plot.ci=T, ci.lwd=2, ci.width=.8, ci.color="grey20",
                  ci.l = us_freq$Lower, 
                  ci.u = us_freq$Upper )
points(y=bplot+.1, x=as.vector(table(as.integer(!grepl("U.S.", target_arbitrators$Nationality)))/length(target_arbitrators$Nationality)), cex=2, pch=4, lwd=4)
segments(x0 = c(0,0), y0=bplot+.1, x1= as.vector(table(as.integer(!grepl("U.S.", target_arbitrators$Nationality)))/length(target_arbitrators$Nationality)), y1 = bplot+.1, lty=2, lwd=4)
dev.off()
### PDF version
pdf("us_frequencies_newsamp.pdf", width=5.4, height=4)
par(cex=1)
par(cex.main=1.3)
par(las=2)
par(mar=c(5,6,4,2)+0.4)
bplot <- barplot2(us_freq$PointEst, horiz=T, width=.2,
                  names.arg=c(paste("U.S. National\n(", us_freq$point[1], " total)", sep=""), 
                              paste("Non-U.S.\nNational\n(", us_freq$point[2], " total)", sep="")), xlab = "Share of responses", lwd=2,
                  main=paste("Nationality of respondents\nin sample, n=", sum(us_freq$point), sep=""), xlim=c(0,1), plot.ci=T, ci.lwd=2, ci.width=.8, ci.color="grey20",
                  ci.l = us_freq$Lower, 
                  ci.u = us_freq$Upper )
points(y=bplot+.1, x=as.vector(table(as.integer(!grepl("U.S.", target_arbitrators$Nationality)))/length(target_arbitrators$Nationality)), cex=2, pch=4, lwd=4)
segments(x0 = c(0,0), y0=bplot+.1, x1= as.vector(table(as.integer(!grepl("U.S.", target_arbitrators$Nationality)))/length(target_arbitrators$Nationality)), y1 = bplot+.1, lty=2, lwd=4)
dev.off()

################### Frequencies of sample professions/backgrounds

background_freq <- data.frame(category = c("Academic", "Private", "Public"),
                              point = as.integer(table(folded.data.complete$employmentCoded)))
background_freq <- cbind(background_freq, binconf(x = background_freq$point, n=sum(background_freq$point)))

png("background_frequencies_newsamp.png", width=400, height=350)
par(cex=1)
par(cex.main=1.3)
par(las=2)
par(mar=c(5,6,4,2)+0.4)
bplot <- barplot2(background_freq$PointEst, horiz=T, width=.2,
                  names.arg=c(paste("Academic\n(", background_freq$point[1], " total)", sep=""), 
                              paste("Private\n(", background_freq$point[2], " total)", sep=""),
                              paste("Public\n(", background_freq$point[3], " total)", sep="")), xlab = "Share of responses", lwd=2,
                  main=paste("Employment of respondents\nin sample, n=", sum(background_freq$point), sep=""), xlim=c(0,1), plot.ci=T, ci.lwd=2, ci.width=.8, ci.color="grey20",
                  ci.l = background_freq$Lower, 
                  ci.u = background_freq$Upper )
points(y=bplot+.1, x=as.vector(table(target_arbitrators$employmentCoded)/sum(table(target_arbitrators$employmentCoded))), cex=2, pch=4, lwd=4)
segments(x0 = c(0,0), y0=bplot+.1, x1= as.vector(table(target_arbitrators$employmentCoded)/sum(table(target_arbitrators$employmentCoded))), y1 = bplot+.1, lty=2, lwd=4)
dev.off()
### PDF Version
pdf("background_frequencies_newsamp.pdf", width=5.4, height=4)
par(cex=1)
par(cex.main=1.3)
par(las=2)
par(mar=c(5,6,4,2)+0.4)
bplot <- barplot2(background_freq$PointEst, horiz=T, width=.2,
                  names.arg=c(paste("Academic\n(", background_freq$point[1], " total)", sep=""), 
                              paste("Private\n(", background_freq$point[2], " total)", sep=""),
                              paste("Public\n(", background_freq$point[3], " total)", sep="")), xlab = "Share of responses", lwd=2,
                  main=paste("Employment of respondents\nin sample, n=", sum(background_freq$point), sep=""), xlim=c(0,1), plot.ci=T, ci.lwd=2, ci.width=.8, ci.color="grey20",
                  ci.l = background_freq$Lower, 
                  ci.u = background_freq$Upper )
points(y=bplot+.1, x=as.vector(table(target_arbitrators$employmentCoded)/sum(table(target_arbitrators$employmentCoded))), cex=2, pch=4, lwd=4)
segments(x0 = c(0,0), y0=bplot+.1, x1= as.vector(table(target_arbitrators$employmentCoded)/sum(table(target_arbitrators$employmentCoded))), y1 = bplot+.1, lty=2, lwd=4)
dev.off()